rm(list=ls())
gc()

## ESTIMATE TSMART PID MODELS


## LOAD PACKAGES
library(data.table)
library(lfe)
library(stringr)


data = fread('panel-analysis-2012-2016.csv.gz',select = c('DemDiff','RepDiff','DemDiff2','RepDiff2','Party_2020','DemSpExpDiff_nohh', 'RepSpExpDiff_nohh','DemSpExp_nohh_year1','RepSpExp_nohh_year1','Age_year1','Race','Gender',
                                                                            'ZipCode','Married_year1','countyfips','Party_year1','hh.n.adj_year1','hh.n.adj_year2',
                                                                            'hh.d.adj_year1','hh.d.adj_year2','hh.r.adj_year1','hh.r.adj_year2',
                                                                            'State',  'WhiteBlockGroupDiff','MarriageDiff','AgeBlockGroupDiff','RegsBlockGroupDiff',
                                                                           'HHIncomeBlockGroupDiff' , 'CollegeBlockGroupDiff' , 'HomeownerBlockGroupDiff' , 'YearBuiltBlockGroupDiff' , 
                                                                           'DriveWorkBlockGroupDiff' , 'EmplBlockGroupDiff' , 'HouseValueBlockGroupDiff','Party_2001','Party_2005','Party_2007','Party_2008','Party_2009',
                                                                           'DemSpExp_nohh_2001','DemSpExp_nohh_2005','DemSpExp_nohh_2007','DemSpExp_nohh_2008','DemSpExp_nohh_2009',
                                                                           'RepSpExp_nohh_2001','RepSpExp_nohh_2005','RepSpExp_nohh_2007','RepSpExp_nohh_2008','RepSpExp_nohh_2009'))


# Make year2 party variable
data[DemDiff==1 | Party_year1=='Democrat'&DemDiff==0,Party_2016:='Democrat']
data[RepDiff==1 | Party_year1=='Republican'&RepDiff==0,Party_2016:='Republican']
data[is.na(Party_2016), Party_2016:='Other']
data[Party_2020=='',Party_2020:=NA]



data[!Party_year1%in%c('Democrat','Republican'), Party_year1:='Other']
data[countyfips=='',countyfips:=NA]
data[ZipCode=='',ZipCode:=NA]
data[Gender=='',Gender:=NA]

# Create household composition variables
data[,hh.n.diff:=hh.n.adj_year2-hh.n.adj_year1]
data[,hh.d.diff:=hh.d.adj_year2-hh.d.adj_year1]
data[,hh.r.diff:=hh.r.adj_year2-hh.r.adj_year1]

# Make downstream party change variable
data[,DemDiff2:=as.numeric(Party_2020=='Democrat')-as.numeric(Party_2016=='Democrat')]
data[,RepDiff2:=as.numeric(Party_2020=='Republican')-as.numeric(Party_2016=='Republican')]


# Make party pre-period variables
data[State=='CA',pid_pre := ifelse(Party_2009=='',NA,Party_2009)]
data[State=='FL',pid_pre := ifelse(Party_2009=='',NA,Party_2009)]
data[State=='NY',pid_pre := ifelse(Party_2008=='',NA,Party_2008)]
data[State=='NC',pid_pre := ifelse(Party_2009=='',NA,Party_2009)]
data[State=='KS',pid_pre := ifelse(Party_2009=='',NA,Party_2009)]

data[State=='CA',pid_pre1 := ifelse(Party_2005=='',NA,Party_2005)]
data[State=='CA',pid_pre2 := ifelse(Party_2007=='',NA,Party_2007)]
data[State=='CA',pid_pre3 := ifelse(Party_2009=='',NA,Party_2009)]

data[State=='FL',pid_pre1 := ifelse(Party_2007=='',NA,Party_2007)]
data[State=='FL',pid_pre2 := ifelse(Party_2007=='',NA,Party_2007)]
data[State=='FL',pid_pre3 := ifelse(Party_2009=='',NA,Party_2009)]

data[State=='NY',pid_pre1 := ifelse(Party_2001=='',NA,Party_2001)]
data[State=='NY',pid_pre2 := ifelse(Party_2001=='',NA,Party_2001)]
data[State=='NY',pid_pre3 := ifelse(Party_2008=='',NA,Party_2008)]

data[State=='KS',pid_pre1 := ifelse(Party_2008=='',NA,Party_2008)]
data[State=='KS',pid_pre2 := ifelse(Party_2008=='',NA,Party_2008)]
data[State=='KS',pid_pre3 := ifelse(Party_2008=='',NA,Party_2008)]

data[State=='NC',pid_pre1 := ifelse(Party_2009=='',NA,Party_2009)]
data[State=='NC',pid_pre2 := ifelse(Party_2009=='',NA,Party_2009)]
data[State=='NC',pid_pre3 := ifelse(Party_2009=='',NA,Party_2009)]




# make partisan exposure pre-period variables
data[State=='CA',DemSpExp_nohh_pre := DemSpExp_nohh_2009]
data[State=='FL',DemSpExp_nohh_pre := DemSpExp_nohh_2009]
data[State=='NY',DemSpExp_nohh_pre := DemSpExp_nohh_2008]
data[State=='NC',DemSpExp_nohh_pre := DemSpExp_nohh_2009]
data[State=='KS',DemSpExp_nohh_pre := DemSpExp_nohh_2009]

data[State=='CA',DemSpExp_nohh_pre1 := DemSpExp_nohh_2005]
data[State=='CA',DemSpExp_nohh_pre2 := DemSpExp_nohh_2007]
data[State=='CA',DemSpExp_nohh_pre3 := DemSpExp_nohh_2009]

data[State=='FL',DemSpExp_nohh_pre1 := DemSpExp_nohh_2007]
data[State=='FL',DemSpExp_nohh_pre2 := DemSpExp_nohh_2007]
data[State=='FL',DemSpExp_nohh_pre3 := DemSpExp_nohh_2009]

data[State=='NY',DemSpExp_nohh_pre1 := DemSpExp_nohh_2001]
data[State=='NY',DemSpExp_nohh_pre2 := DemSpExp_nohh_2001]
data[State=='NY',DemSpExp_nohh_pre3 := DemSpExp_nohh_2008]

data[State=='KS',DemSpExp_nohh_pre1 := DemSpExp_nohh_2008]
data[State=='KS',DemSpExp_nohh_pre2 := DemSpExp_nohh_2008]
data[State=='KS',DemSpExp_nohh_pre3 := DemSpExp_nohh_2008]

data[State=='NC',DemSpExp_nohh_pre1 := DemSpExp_nohh_2009]
data[State=='NC',DemSpExp_nohh_pre2 := DemSpExp_nohh_2009]
data[State=='Nc',DemSpExp_nohh_pre3 := DemSpExp_nohh_2009]





data[State=='CA',RepSpExp_nohh_pre1 := RepSpExp_nohh_2005]
data[State=='CA',RepSpExp_nohh_pre2 := RepSpExp_nohh_2007]
data[State=='CA',RepSpExp_nohh_pre3 := RepSpExp_nohh_2009]

data[State=='FL',RepSpExp_nohh_pre1 := RepSpExp_nohh_2007]
data[State=='FL',RepSpExp_nohh_pre2 := RepSpExp_nohh_2007]
data[State=='FL',RepSpExp_nohh_pre3 := RepSpExp_nohh_2009]

data[State=='NY',RepSpExp_nohh_pre1 := RepSpExp_nohh_2001]
data[State=='NY',RepSpExp_nohh_pre2 := RepSpExp_nohh_2001]
data[State=='NY',RepSpExp_nohh_pre3 := RepSpExp_nohh_2008]

data[State=='KS',RepSpExp_nohh_pre1 := RepSpExp_nohh_2008]
data[State=='KS',RepSpExp_nohh_pre2 := RepSpExp_nohh_2008]
data[State=='KS',RepSpExp_nohh_pre3 := RepSpExp_nohh_2008]

data[State=='NC',RepSpExp_nohh_pre1 := RepSpExp_nohh_2009]
data[State=='NC',RepSpExp_nohh_pre2 := RepSpExp_nohh_2009]
data[State=='NC',RepSpExp_nohh_pre3 := RepSpExp_nohh_2009]



#
data = data[!is.na(DemSpExp_nohh_pre3)&!is.na(RepSpExp_nohh_pre3) & 
              !is.na(pid_pre3) & !is.na(pid_pre2) & !is.na(pid_pre1) &
              !is.na(DemSpExp_nohh_pre2)&!is.na(RepSpExp_nohh_pre2) &
              !is.na(DemSpExp_nohh_pre1)&!is.na(RepSpExp_nohh_pre1) ]



data[,DemSpPre1:=round(DemSpExp_nohh_pre1*100)]
data[,RepSpPre1:=round(RepSpExp_nohh_pre1*100)]
data[,DemSpPre2:=round(DemSpExp_nohh_pre2*100)]
data[,RepSpPre2:=round(RepSpExp_nohh_pre2*100)]
data[,DemSpPre3:=round(DemSpExp_nohh_pre3*100)]
data[,RepSpPre3:=round(RepSpExp_nohh_pre3*100)]



# split data

dems = data[Party_year1=='Democrat' & Party_2016=='Democrat']
reps = data[Party_year1=='Republican' & Party_2016=='Republican']
oths = data[Party_year1=='Other' & Party_2016=='Other']



rm(data)
gc()

# construct grouping variables based on state, year1 treatment decile, year1 household composition variables, and party and partisan exposure pretrend variables
# we can drop rows where any of these are missing


# Drop rows where grouping variables from main spec are missing
dems = dems[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race)  &!is.na(ZipCode)&!is.na(Married_year1)]
reps = reps[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race)& !is.na(ZipCode)&!is.na(Married_year1)]
oths = oths[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) &  !is.na(ZipCode)&!is.na(Married_year1)]

dems[,DemSpDecile:=cut(DemSpExp_nohh_year1,breaks=unique(as.numeric(quantile(DemSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
reps[,DemSpDecile:=cut(DemSpExp_nohh_year1,breaks=unique(as.numeric(quantile(DemSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
oths[,DemSpDecile:=cut(DemSpExp_nohh_year1,breaks=unique(as.numeric(quantile(DemSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]

# republican exposure
dems[,RepSpDecile:=cut(RepSpExp_nohh_year1,breaks=unique(as.numeric(quantile(RepSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
reps[,RepSpDecile:=cut(RepSpExp_nohh_year1,breaks=unique(as.numeric(quantile(RepSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
oths[,RepSpDecile:=cut(RepSpExp_nohh_year1,breaks=unique(as.numeric(quantile(RepSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]



#####
# make groups
# spatial seg treatment
dems[,GroupSpDemExp:=paste( DemSpDecile,
                            DemSpPre1,DemSpPre2,DemSpPre3,
                            pid_pre1, pid_pre2, pid_pre3,hh.n.adj_year1,hh.d.adj_year1, sep ='_')]
reps[,GroupSpDemExp:=paste(DemSpDecile,
                           DemSpPre1,DemSpPre2,DemSpPre3,
                           pid_pre1, pid_pre2, pid_pre3,hh.n.adj_year1,hh.d.adj_year1, sep ='_')]
oths[,GroupSpDemExp:=paste(DemSpDecile,
                           DemSpPre1,DemSpPre2,DemSpPre3,
                           pid_pre1, pid_pre2, pid_pre3,hh.n.adj_year1,hh.d.adj_year1, sep ='_')]

dems[,GroupSpRepExp:=paste(RepSpDecile,RepSpPre1,RepSpPre2,RepSpPre3,pid_pre1, pid_pre2, pid_pre3, hh.n.adj_year1,hh.r.adj_year1, sep ='_')]
reps[,GroupSpRepExp:=paste(RepSpDecile,RepSpPre1,RepSpPre2,RepSpPre3,pid_pre1, pid_pre2, pid_pre3,hh.n.adj_year1,hh.r.adj_year1, sep ='_')]
oths[,GroupSpRepExp:=paste(RepSpDecile,RepSpPre1,RepSpPre2,RepSpPre3,pid_pre1, pid_pre2, pid_pre3, hh.n.adj_year1,hh.r.adj_year1, sep ='_')]


# estimate models

# spatial seg treatment
ModelDemSpExpDems = summary(felm(DemDiff2 ~ DemSpExpDiff_nohh + hh.d.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                   HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                   DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpDemExp|0|countyfips, data = dems[!is.na(DemSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
ModelDemSpExpReps = summary(felm(DemDiff2 ~ DemSpExpDiff_nohh+ hh.d.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                   HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                   DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpDemExp|0|countyfips, data = reps[!is.na(DemSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
ModelDemSpExpOths = summary(felm(DemDiff2 ~ DemSpExpDiff_nohh+ hh.d.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                   HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                   DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpDemExp|0|countyfips, data = oths[!is.na(DemSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)

ModelRepSpExpDems = summary(felm(RepDiff2 ~ RepSpExpDiff_nohh+ hh.r.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                   HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                   DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpRepExp|0|countyfips, data = dems[!is.na(RepSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
ModelRepSpExpReps = summary(felm(RepDiff2 ~ RepSpExpDiff_nohh+ hh.r.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                   HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                   DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpRepExp|0|countyfips, data = reps[!is.na(RepSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
ModelRepSpExpOths = summary(felm(RepDiff2 ~ RepSpExpDiff_nohh+ hh.r.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                   HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                   DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpRepExp|0|countyfips, data = oths[!is.na(RepSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)



save(ModelDemSpExpDems, ModelDemSpExpReps, ModelDemSpExpOths, 
     ModelRepSpExpDems, ModelRepSpExpReps, ModelRepSpExpOths,
     
     
     file = paste0('results/future-results-2012-2016-pretrend.Rdata'))

